# =============================================================================
# APPENDIX J FIGURE 14A: KIM & SUDDUTH ROBUSTNESS ANALYSIS
# Leave-one-out jackknife analysis for Kim & Sudduth replication
# =============================================================================

# --- PACKAGES ----------------------------------------------------------------
library(dplyr)
library(haven)
library(sandwich)
library(ggplot2)
library(reshape2)
library(lmtest)
library(stargazer)

# --- DATA LOADING ------------------------------------------------------------
data <- read_dta("kim_sudduth.dta")
est <- read.csv("estimates_independent.csv")

# --- DATA PREPARATION --------------------------------------------------------
# Ensure consistent identifiers for merging
data$COWcode <- data$cowcode

# Merge latent estimates with original dataset
data_new <- inner_join(est, data, by = c("COWcode", "year")) %>%
  arrange(COWcode, year) %>%
  group_by(COWcode) %>%
  mutate(
    dyn.estimates_lag = lag(dyn.estimates, n = 1),
    dyn.up_lag = lag(dyn.up, n = 1),
    dyn.lo_lag = lag(dyn.lo, n = 1)
  ) %>%
  ungroup()

# Create exact sample matching for models
data_new_exact <- data_new %>% filter(!is.na(dyn.estimates_lag))

data_used_in_m2new <- data_new_exact %>% 
  filter(complete.cases(dyn.estimates_lag, anycoupatt_fin, gwf_leadermil, 
                        ll_rgdpe_PW, lag_growth_rgdpe_PW, ll_pop_PW, postcold, anyattyrs))

# --- HELPER FUNCTIONS --------------------------------------------------------
# Function to get clustered standard errors
get_clustered_se <- function(model, cluster_var) {
  vcov_cluster <- vcovHC(model, type = "HC1", cluster = cluster_var)
  coeftest(model, vcov_cluster)
}

# Leave-one-out robustness function
leave_one_out <- function(model_formula, data) {
  n <- nrow(data)
  coef_results <- numeric(n)
  
  for (i in 1:n) {
    temp_data <- data[-i, ]
    model <- glm(model_formula, data = temp_data, family = "binomial")
    coef_results[i] <- coef(model)["dyn.estimates_lag"]
  }
  
  return(coef_results)
}

# Function to plot and compare coefficients
compare_coefficients <- function(coef_vector, original_coef, model_name) {
  h <- hist(coef_vector, main = paste("", model_name),
            xlab = "Coefficient Estimate",
            col = "gray", border = "white", breaks = 20)
  
  abline(v = original_coef, col = "blue", lwd = 2, lty = 2)
  
  y_max <- max(h$counts)
  text(x = original_coef, y = y_max * 0.9, 
       labels = paste0("Full Sample:\n", round(original_coef, 4)),
       col = "blue", pos = 4, cex = 0.8, xpd = TRUE)
  
  cat(paste0("\n", model_name, ":\n"))
  cat("Original coefficient: ", round(original_coef, 4), "\n")
  cat("Mean of leave-one-out coefficients: ", round(mean(coef_vector), 4), "\n")
  cat("Min of leave-one-out coefficients: ", round(min(coef_vector), 4), "\n")
  cat("Max of leave-one-out coefficients: ", round(max(coef_vector), 4), "\n")
  cat("Range of leave-one-out coefficients: ", round(range(coef_vector), 4), "\n")
}

# --- ANALYSIS ----------------------------------------------------------------
# Fit baseline models
m2restricted <- glm(anycoupatt_fin ~ gwf_legislature + gwf_leadermil + ll_rgdpe_PW + 
                      lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(anyattyrs, 3), 
                    data = data_used_in_m2new, family = "binomial")

m5restricted <- glm(reshuffleatt_fin ~ gwf_legislature + gwf_leadermil + ll_rgdpe_PW + 
                      lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(shuffattyrs, 3), 
                    data = data_used_in_m2new, family = "binomial")

m8restricted <- glm(rechangeatt_fin ~ gwf_legislature + gwf_leadermil + ll_rgdpe_PW + 
                      lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(rechattyrs, 3), 
                    data = data_used_in_m2new, family = "binomial")

# New models with latent measure
m2new <- glm(anycoupatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW + 
               lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(anyattyrs, 3), 
             data = data_new, family = "binomial")

m5new <- glm(reshuffleatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW + 
               lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(shuffattyrs, 3), 
             data = data_new, family = "binomial")

m8new <- glm(rechangeatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW + 
               lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(rechattyrs, 3), 
             data = data_new, family = "binomial")

# Generate clustered standard errors table
cluster_var <- data_new$cowcode
stargazer(m2restricted, m2new, m5restricted, m5new, m8restricted, m8new, 
          type = "latex",
          se = list(
            sqrt(diag(vcovHC(m2restricted, type = "HC1", cluster = data_used_in_m2new$cowcode))),
            sqrt(diag(vcovHC(m2new, type = "HC1", cluster = data_new$cowcode))),
            sqrt(diag(vcovHC(m5restricted, type = "HC1", cluster = data_used_in_m2new$cowcode))),
            sqrt(diag(vcovHC(m5new, type = "HC1", cluster = data_new$cowcode))),
            sqrt(diag(vcovHC(m8restricted, type = "HC1", cluster = data_used_in_m2new$cowcode))),
            sqrt(diag(vcovHC(m8new, type = "HC1", cluster = data_new$cowcode)))
          ),
          column.labels = c("Restricted", "Replication", "Restricted", "Replication", 
                            "Restricted", "Replication"),
          star.cutoffs = c(0.1, 0.05, 0.01), 
          notes.append = FALSE)


# Leave-one-out analysis
coef_m2new <- leave_one_out(anycoupatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW + 
                              lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(anyattyrs, 3), data_new)

coef_m5new <- leave_one_out(reshuffleatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW + 
                              lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(shuffattyrs, 3), data_new)

coef_m8new <- leave_one_out(rechangeatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW + 
                              lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(rechattyrs, 3), data_new)

# Extract original coefficients
orig_coef_m2new <- coef(m2new)["dyn.estimates_lag"]
orig_coef_m5new <- coef(m5new)["dyn.estimates_lag"]
orig_coef_m8new <- coef(m8new)["dyn.estimates_lag"]

# --- VISUALIZATION -----------------------------------------------------------
par(mfrow = c(1, 3))
compare_coefficients(coef_m2new, orig_coef_m2new, "All Coups")
compare_coefficients(coef_m5new, orig_coef_m5new, "Reshuffling Coups")
compare_coefficients(coef_m8new, orig_coef_m8new, "Regime-Change Coups")
par(mfrow = c(1, 1))